# Replication script:
# Political Competition in Dynamic Economies
# By Sondre Ulvund Solstad
# Article published in the European Journal of Political Economy

# Instructions: 
# 1. Set working directory to one containing all the files in this archive.
# 2. Run Stage 0, then Stage 1
# 3. Replicate any individual component of the analysis by running the relevant block of code. For instance, to replicate table 2, run the lines of code under the heading "Stage 3. Generate Table 2"

# I hope future researchers will find these materials helpful, and expand and improve upon this analysis. All errors are mine. - Sondre Ulvund Solstad, 2023

# Stage 0. Load packages and set options ----------------------------------------------------
library(tidyverse)
library(stargazer)
library(readr)
library(sandwich)
library(countrycode)

# Some packages are not on CRAN. These can be installed in the following way:
# library(devtools)
# install_github("vdeminstitute/vdemdata")
# install_github('sondreus/QuickReg')
# install_github('sondreus/QuickCoefPlot')
# install_github('sondreus/QuickEffectSize')
# install_github('sondreus/coverage')
library(vdemdata)
library(QuickReg)
library(QuickCoefPlot)
library(QuickEffectSize)
library(coverage)

# Stage 1. Load and prepare data ----------------------------------------------------

# Load base data:
cp <- data.frame(read_csv('base_country_year_data.csv'))

# This base data follows Pascali (2018) closely (to save space, the construction of them is omitted here - and interested readers are referred to his original paper for more detail: Pascali, Luigi (2018). The wind of change: Maritime technology, trade and economic development. American Economic Review.

# The variables are as follows:
# country = country name, from Polity project
# pascali_country = country name in Pascali's data (for completeness)
# year = year
# ccode = Correlates of War country code
# region = world region (Americas, Middle East, Europe, Asia and Oceania, Africa)
# ln_exportpc = exports per capita, from Pascali
# lpred_TOTAL_trad_5_ys = Predicted trade, from Pascali
# polity2 = polity2 score from the Polity project dataset
# xconst = xconst score from the Polity project dataset
# sovereign = indicator for whether the country is sovereign
# civilwar_cow_01 = indicator, equal to 1 if there is 1 or more civil war in the country in the year in question according to the Correlates of War Project dataset
# democracy = indicator, from Boix et al 2015
# war = indicator, equal to 1 if the country is engaged in one or more war in the year in question according to the Correlates of War Project dataset
# gdppc_mad = GDP per capita, from the Maddison project
# urban_50c = urbanization indicator, from Pascali

# Restrict data to sovereign countries:
cp <- cp[cp$sovereign == 1 & !is.na(cp$sovereign), ] 

# Order data by year:
cp <- cp[order(cp$year), ]

# Define predicted trade with year and region fixed effects:
cp$pred_new[!is.na(cp$ln_exportpc) & !is.na(cp$lpred_TOTAL_trad_5ys) & !is.na(cp$region)] <- lm(ln_exportpc ~ lpred_TOTAL_trad_5ys + as.factor(year) + as.factor(region), data = cp)$fitted

# Define missed trade (a non-negative quantity):
cp$pred_new <- cp$pred_new + max(cp$ln_exportpc - cp$pred_new, na.rm = TRUE)
cp$miss_new <- cp$pred_new - cp$ln_exportpc
cp$baseline_miss <- ave(cp$miss_new, cp$ccode, FUN = function(x) rep(na.omit(x)[1], length(x)))
cp$baseline_actual <- ave(cp$ln_exportpc, cp$ccode, FUN = function(x) rep(na.omit(x)[1], length(x)))

# Define the regime coalition variable:
cp$xconst[cp$xconst < 0] <- NA
cp$baseline_xconst <- ave(cp$xconst, cp$ccode, FUN = function(x) rep(na.omit(x)[1], length(x)))
cp$xconst <- (cp$xconst - 1)/6

# Define actual trade
cp$actual <- cp$ln_exportpc

# Define coastal support variable
# A: load data from VDEM
vdem$ccode <- vdem$COWcode
cp <- merge(cp, vdem[, c("year", "ccode", "v2regsuploc")], by= c("ccode", "year"), all.x = TRUE)
cp$v2regsuploc[cp$pascali_country == 'Cuba' & cp$year %in% 1890:1910] <- 1 # Data changes after this study and replication materials were first written mean that Cuba is no longer (as of 2023) coded as having coastal support. This does not affect any finding in the paper, but slightly changes some coefficients in table 3 and 4. This replication package recreates the original tables - but should one wish, one can comment out this line to use the new data.

# B. Make factor levels descriptive
cp$support_location <- NA
cp$support_location[cp$v2regsuploc == 0] <- "Abroad"
cp$support_location[cp$v2regsuploc == 1] <- "In the capital"
cp$support_location[cp$v2regsuploc == 2] <- "In urban areas outside the capital"
cp$support_location[cp$v2regsuploc == 3] <- "in rural areas"
cp$support_location[cp$v2regsuploc == 4] <- "The groups are not concentrated in any particular area"
cp$support_location <- factor(cp$support_location, levels = unique(cp$support_location))

# C. Load data on capitals
capitals <- read.csv("capitals.csv", stringsAsFactors = FALSE)
colnames(capitals)[1] <- "pascali_country"
cp <- merge(cp, capitals, by = "pascali_country", all.x = TRUE)

# D. Define coastal support variable
cp <- cp[order(cp$year), ]
cp$coastal_support <- FALSE
cp$coastal_support[cp$support_location %in% c("In the capital") & cp$coastal == TRUE] <- TRUE

# Select first year of the data as base year (to avoid endogenity):
cp$coastal_support[is.na(cp$pred_new)] <- NA 
cp$coastal_support <- ave(cp$coastal_support, cp$ccode, FUN = function(x) rep(na.omit(x)[1], length(x)))

# Define interaction term
cp$coast_sup_interact_xcst <- as.logical(cp$coastal_support)*cp$xconst

# Load polity with one-year lag
polity <- read.csv("p4v2016.csv")
polity <- polity[!(polity$ccode == 816 & polity$year >= 1976), ] # Vietnam fix 
polity$ccode[polity$ccode == 818 ] <- 816                        # Vietnam fix
polity$country[polity$ccode == 816] <- "Vietnam"
polity <- polity[order(polity$year), ]

# I interpolate here because transition years often are missing. Uses the same formula as in the polity codebook (linear interpolation for -88).
linear_interpolation <- function(x){
  if(any(is.na(x)) & any(!is.na(x))){
    for(i in which(is.na(x))[which(is.na(x)) > min(which(!is.na(x)))]){
      x_low <- NA
      ind_low <- 1
      while(is.na(x_low) & ind_low < 10){
        x_low <- x[i-ind_low]
        ind_low <- ind_low + 1
      }
      x_high <- NA
      ind_high <- 1
      while(is.na(x_high) & ind_high < 10){
        x_high <- x[i+ind_high]
        ind_high <- ind_high + 1
      }
      if(is.na(x_low) | is.na(x_high)){
        x[i] <- NA
      } else {
        x[i] <- seq(from = x_low, to = x_high, 
                    length.out = ind_high - ind_low+3)[ind_low]
      }
    }
  }
  return(x)}

# These lines move from polity coding (negative values for missingness to native R ('NA')) and linearly interpolate missing values
polity$xconst[polity$xconst %in% c(-88)] <- NA
polity$xconst_missing <- polity$xconst %in% c(-77, -66)
polity <- polity[order(polity$year), ]
polity$xconst <- ave(polity$xconst, polity$ccode, FUN = function(x) linear_interpolation(x))

# These lines generate one-year lagged variables
polity$xconst_1_year_lag <- ave(polity$xconst, polity$ccode, FUN = function(x) c(NA, x)[1:length(x)])
polity$xconst_1_year_lag[polity$xconst_missing] <- NA
polity$xconst_missing_1_year_lag <- ave(polity$xconst_missing, polity$ccode, FUN = function(x) c(NA, x)[1:length(x)])
polity$xconst_1_year_lag[polity$xconst_missing_1_year_lag] <- NA

# These lines generate five-year lagged variables
polity$xconst_5_year_lag <- ave(polity$xconst, polity$ccode, FUN = function(x) c(NA, NA, NA, NA, NA, x)[1:length(x)])
polity$polity2_1_year_lag <- ave(polity$polity2, polity$ccode, FUN = function(x) c(NA, x)[1:length(x)])
polity$polity2_5_year_lag <- ave(polity$polity2, polity$ccode, FUN = function(x) c(NA, NA, NA, NA, NA, x)[1:length(x)])

polity$polity2_1_year_lag[cp$polity2_1_year_lag < -10] <- NA
polity$polity2_1_year_lag <- (10+polity$polity2_1_year_lag)/20

# This generates the "coalition expansion" variable
polity$delta_xconst_up <- polity$xconst - polity$xconst_1_year_lag > 0

# This merges the polity data into the main dataset
cp <- merge(cp, polity[, c("ccode", "year", "delta_xconst_up", "xconst_1_year_lag", "polity2_1_year_lag", "xconst_5_year_lag", "polity2_5_year_lag", "xrreg", "xrcomp", "xropen",  "parreg", "parcomp", "exrec", "exconst", "polcomp", "polity")], all.x = TRUE)

# This ensures variables are on a 0-1 scale:
cp$xconst_1_year_lag <- (cp$xconst_1_year_lag -1)/6 
cp$xconst_5_year_lag <- (cp$xconst_5_year_lag -1)/6 
cp$xconst_5_year_lag[is.na(cp$xconst_5_year_lag)] <- cp$xconst_1_year_lag[is.na(cp$xconst_5_year_lag)]

# This generates a xconst-minus-baseline variable
cp$xconst_minus_baseline <- cp$xconst - ((cp$baseline_xconst-1)/6)

# This makes the coastal support variable numeric, and hard-codes interaction variables
cp$coastal_support <- as.numeric(cp$coastal_support)
cp$coast_sup_interact_miss_new <- cp$coastal_support*cp$miss_new
cp$coalitions_interact_coastal_support <- cp$xconst*as.logical(cp$coastal_support)

# Restrict the analysis to sovereign countries:
cp <- cp[cp$sovereign == 1 & !is.na(cp$sovereign), ]

# Define predicted trade with year and region fixed effects:
cp$pred_new[!is.na(cp$ln_exportpc) & !is.na(cp$lpred_TOTAL_trad_5ys) & !is.na(cp$region)] <- lm(ln_exportpc ~ lpred_TOTAL_trad_5ys + as.factor(year) + as.factor(region), data = cp)$fitted

# This defines missed trade (a non-negative quantity)
cp$pred_new <- cp$pred_new + max(cp$ln_exportpc - cp$pred_new, na.rm = TRUE)
cp$miss_new <- cp$pred_new - cp$ln_exportpc
cp$baseline_miss <- ave(cp$miss_new, cp$ccode, FUN = function(x) rep(na.omit(x)[1], length(x)))
cp$baseline_actual <- ave(cp$ln_exportpc, cp$ccode, FUN = function(x) rep(na.omit(x)[1], length(x)))

# This ensures any polity components which are missing are coded as such (moves from polity coding to native R coding)
for(i in c("xrreg", "xrcomp", "xropen",  "parreg", "parcomp", "exrec", "exconst", "polcomp", "polity")){
  cp[, i][cp[, i] < 1 ] <- NA
}

# The following lines generate a version of polity excepting the contribution of xconst - i.e. the score implied by the other components of the polity2 score.  
# Note: XROPEN is coded conditionally on other variables.

# Then removing:
# A. Direct contribution of exconst
cp$polity2 <- cp$polity2 * 20 - 10 
cp$polity2_sans_xconst <- cp$polity2 + ifelse(cp$exconst >= 4, -cp$exconst+3, 4-cp$exconst)
cp$polity2_sans_xconst[cp$exconst == 0 | is.na(cp$exconst)] <- cp$polity2[cp$exconst == 0]

# B. Other conditionally coded variables
cp$polity2_sans_xconst[cp$exconst %in% 2:3 & cp$xropen %in% 3:4] <- cp$polity2_sans_xconst[cp$exconst %in% 2:3 & cp$xropen %in% 3:4] - 1

cp$polity2 <- (cp$polity2 + 10)/20 
cp$polity2_sans_xconst <- (cp$polity2_sans_xconst + 10)/20

# Subsetting to relevant columns:
cp <- cp[, c("year",                      
             "region", 
             "country",
             "pascali_country",
            "sovereign", 
            "democracy",                
            "coastal_support",
            "xconst",
             "ln_exportpc",               
            "pred_new",
            "miss_new",                  
            "baseline_actual",
            "baseline_miss",             
            "civilwar_cow_01",           
            "war",
            "coalitions_interact_coastal_support",  
            "ccode",                     
            "delta_xconst_up",
            "coast_sup_interact_miss_new",
            "actual",
            "urban_50pc",
            "xconst_1_year_lag",         
            "polity2",                 
            "polity2_sans_xconst")]

# This orders the dataset
cp <- cp[order(cp$year), ]

# This writes it to a file
write_csv(cp, 'country_year_data.csv')

## ------- 
## This generates the data used to construct Figure 6:

# Load WDI data:
d <- read.table("WDI-1950-2015.txt")
d$iso3c <- countrycode(d$ccode, 'cown', 'iso3c')
d <- d[!duplicated(paste0(d$ccode, "_", d$year)), ]

# Load and merge Economic complexity data
d$log_pop <- log(d$population)
d$log_gdppc <- log(d$GDP.per.capita)

atlas <- read.csv("econ_complexity.csv")
atlas$ccode <- countrycode(atlas$country, "country.name", "cown")

d <- merge(d, atlas[, c('eci_value', 'year', 'ccode')], by = c("ccode", "year"), all.x = TRUE)

# Load and merge Polity data
polity <- read.csv("p4v2016.csv")
d <- merge(d, polity, by = c('ccode', 'year'), all.x= T)

write_csv(d[, c('year', 'log_gdppc', 'ccode', 'log_pop', 'eci_value', 'polity2')], 'country_year_data_econ_complexity_1.csv')

# ------- 
# This generates the data used to construct Figure 8:

# Loading data
## Polity IV data
econ_complexity_2 <- read.csv("p4v2016.csv")

## Economic Complexity Data
require(countrycode)
econ.complexity <- read.csv("econ_complexity.csv")
econ.complexity$ccode <- countrycode(econ.complexity$country, "country.name", "cown")
econ.complexity <- econ.complexity[,c(1,2,4,5,6,7)]

econ_complexity_2 <- merge(econ_complexity_2, econ.complexity, by = c("ccode", "year"), all = TRUE)

## WDI indicators 
w <- read.table("WDI-1950-2015.txt")
colnames(w)[2] <- "World Bank Country Name"
econ_complexity_2 <- merge(econ_complexity_2, w, by=c("ccode", "year"), all = TRUE)

## Lagging and leading political regime variable
# Putting it on 0-1 scale for simplicity of interpretation
econ_complexity_2$polity2.01 <- (econ_complexity_2$polity2 + 10) / 20
require(DataCombine)
econ_complexity_2 <- econ_complexity_2[order(econ_complexity_2$year),]

for (k in c(-5:-1,1:20)){
  econ_complexity_2 <- slide(econ_complexity_2, Var = "polity2.01", TimeVar = "year", GroupVar = "ccode", NewVar=paste0("polity2.01", ifelse(k > 0, paste0("lag", k), paste0("led", -k))), slideBy = -k, keepInvalid = FALSE, reminder = FALSE) 
}

econ_complexity_2 <- econ_complexity_2[!is.na(econ_complexity_2$eci_value), c("year",  "ccode", "country", "GDP.per.capita", "population",  "polity2", "eci_value", "polity2.01led1", "polity2.01", "polity2.01lag1", "polity2.01lag2", "polity2.01lag3", "polity2.01lag4", "polity2.01lag5", "polity2.01lag6", "polity2.01lag7", "polity2.01lag8", "polity2.01lag9", "polity2.01lag10", "polity2.01lag11", "polity2.01lag12", "polity2.01lag13", "polity2.01lag14", "polity2.01lag15", "polity2.01lag16", "polity2.01lag17", "polity2.01lag18", "polity2.01lag19", "polity2.01lag20")]
econ_complexity_2 <- econ_complexity_2[order(econ_complexity_2$year), ]
write_csv(econ_complexity_2, 'country_year_data_econ_complexity_2.csv')

# ------ Load the datasets:

cp <- data.frame(read_csv('country_year_data.csv'))
econ_complexity_1 <- data.frame(read_csv('country_year_data_econ_complexity_1.csv'))
econ_complexity_2 <- data.frame(read_csv('country_year_data_econ_complexity_2.csv'))

# Stage 2. Generate Table 1 ----------------------------------------------------

summary.stats <- cp[cp$year %in% cp$year[!is.na(cp$ln_exportpc)], c("xconst", "ln_exportpc", "pred_new", "miss_new", "baseline_actual", "baseline_miss", "urban_50pc", "civilwar_cow_01", "war", "year")]

colnames(summary.stats) <- c("Coalitions",
                             "Trade per Capita",
                             "Predicted Trade per Capita",
                             "Missed Opportunity for Trade",
                             "Trade at Baseline",
                             "Missed Opportunity at Baseline",
                             "Urbanization rate", 
                             "Civil War",
                             "War",
                             "Year")

library(stargazer)
stargazer(summary.stats, summary.stat = c("n", "sd", "min", "median","mean", "max"))

# Stage 3. Generate Table 2  ----------------------------------------------------
library(QuickCoefPlot)
library(QuickReg)
QuickReg(iv.vars = c("xconst", "baseline_miss", "baseline_actual", "urban_50pc", "region", "civilwar_cow_01", "war", "gdppc_mad"), 
         iv.vars.names = c("Coalitions", "Baseline Missed Opportunity", "Baseline Actual Trade", "Urbanization Rate", "Primary Energy Consumption", "Civil War", "War", "GDP per capita (Maddison)"),
         dv.vars.names = "Missed Opportunity",
         dv.vars = "miss_new",
         specifications = list(c(1,2),
                               c(1,2,3),
                               c(1,2,3,4),
                               c(1,2,3,4,6,7)),
         fixed.effects = c("year", "region"),
         fixed.effects.names =  c("Year", "Region"),
         cluster = c("ccode", "year"),
         cluster.names = "Country and Year",
         out.name = "hmm.html",
         data = cp)

# Stage 4. Generate Figure 3  ----------------------------------------------------
library(QuickEffectSize)
library(Zelig)
fit <- zelig(miss_new ~ baseline_miss + xconst + as.factor(region) + as.factor(year), data = cp, model = "normal")
p <- qes(fit, iv.var = "xconst", 
         custom.range = c(0,1), 
         sim.n = 50, 
         range.n = 500,
         #    transform.y = function(y) exp(y)*118.45*1.29) # 2018 US Dollars
         transform.y = function(y) exp(y))
p + scale_interact_continuous(breaks = c(0:6/6), labels = c("Exclusive", "", "", "", "", "", "Inclusive"))+ylab("Missed Trade\n(1840 Pounds Sterling per Capita)")+xlab("Coalition")

# Stage 5. Generate Table 3  ----------------------------------------------------
QuickReg(iv.vars = c("xconst", "coastal_support", "coalitions_interact_coastal_support", "baseline_miss", "baseline_actual", "urban_50pc", "ccode", "civilwar_cow_01", "war", "gdppc_mad"), 
         iv.vars.names = c("Coalitions", "Coastal Support", "Coalitions * Coastal Support", "Baseline Missed Opportunity", "Baseline Actual Trade", "Urbanization Rate", "Primary Energy Consumption", "Civil War", "War", "GDP per capita (Maddison)"),
         dv.vars.names = "Missed Opportunity",
         dv.vars = "miss_new",
         specifications = list(c(1:4),
                               c(1:4,5),
                               c(1:4,5,6),
                               c(1:4,5,6,8,9)),
         fixed.effects = c("year", "region"),
         fixed.effects.names =  c("Year", "Region"),
         cluster = c("year", "country"),
         cluster.names = "Country and Year",
         data = cp)

# Stage 5. Generate Figure 4  ----------------------------------------------------
library(QuickEffectSize)
library(Zelig)
fit <- zelig(miss_new ~ baseline_miss + xconst*coastal_support + as.factor(region) + as.factor(year), data = cp, model = "normal")
pdata1 <- qes(fit, iv.var = "xconst", 
              custom.range = c(0,1), 
              sim.n = 50, 
              range.n = 500,
              set.covar = "coastal_support = TRUE",
              # transform.y = function(y) exp(y)*118.45*1.29) # 2018 US Dollars
              transform.y = function(y) exp(y), return.pdata = T)
pdata2 <- qes(fit, iv.var = "xconst", 
              custom.range = c(0,1), 
              sim.n = 50, 
              range.n = 500,
              set.covar = "coastal_support = FALSE",
              # transform.y = function(y) exp(y)*118.45*1.29) # 2018 US Dollars
              transform.y = function(y) exp(y), return.pdata = T)
pdata1$coastal_support <- "Preexisting Coastal Support"
pdata2$coastal_support <- "No Coastal Support"

pdata <- rbind(pdata1, pdata2)

ggplot(pdata, aes(y = miss_new, x = xconst, col = coastal_support))+
  geom_jitter(alpha = 0.05)+
  #geom_jitter(data=pdata[pdata$coastal_support, ], col= "gray", alpha = 0.05)+
  #geom_jitter(data=pdata[!pdata$coastal_support, ], col= "skyblue", alpha = 0.05)+
  theme_classic()+ scale_interact_continuous(breaks = c(0:6/6), labels = c("Small", "", "", "", "", "", "Large"))+ylab("Missed Trade\n(1840 Pounds Sterling per Capita)")+xlab("Coalition")+scale_color_manual(values=c("skyblue", "gray"))+theme(legend.position = "bottom", legend.title = element_blank())+ guides(colour = guide_legend(override.aes = list(alpha = 1)))


# Stage 6. Generate Table 4  ----------------------------------------------------

### TABLE GLM
res <- list()

# Model 1
qcp(glm(delta_xconst_up ~ miss_new + actual + xconst_1_year_lag 
        + baseline_miss + baseline_actual + as.factor(region) + as.factor(year), 
        data = cp, 
        family = "binomial"),
    cluster = c("year", "ccode"), 
    include = c(1:100), save.summary.df = T)
res[[1]] <- qcp.summary.df
rm(qcp.summary.df)

# Model 2
p <- qcp(glm(delta_xconst_up ~ miss_new + actual + xconst_1_year_lag + baseline_miss + baseline_actual + war + civilwar_cow_01 + urban_50pc + as.factor(region) + as.factor(year), data = cp, family = "binomial"),
         cluster = c("year", "ccode"), 
         include = c(1:100), save.summary.df = T)
res[[2]] <- qcp.summary.df
rm(qcp.summary.df)

# Model 3
qcp(glm(delta_xconst_up ~ miss_new+coastal_support+coast_sup_interact_miss_new  + actual + xconst_1_year_lag + baseline_miss + baseline_actual + as.factor(region) + as.factor(year), data = cp, family = "binomial"),
    cluster = c("year", "ccode"), 
    include = c(1:100), save.summary.df = T)
res[[3]] <- qcp.summary.df
rm(qcp.summary.df)

# Model 4
p <- qcp(glm(delta_xconst_up ~ miss_new+coastal_support+coast_sup_interact_miss_new + actual + xconst_1_year_lag + baseline_miss + baseline_actual + war + civilwar_cow_01 + urban_50pc + as.factor(region) + as.factor(year), data = cp, family = "binomial"),
         cluster = c("year", "ccode"), 
         include = c(1:100), save.summary.df = T)
res[[4]] <- qcp.summary.df
rm(qcp.summary.df)

nice_output <- function(myres, skip = c('as.factor(region)', 'as.factor(year)', 'civilwar', 'war')){
  for(i in skip){
    if(sum(grepl(i, myres[, 1], fixed = T)) == 0){
      skip <- setdiff(skip, i)
    } else {
    myres <- myres[!grepl(i, myres[, 1], fixed = T), ]
    }
  }
  myres <- cbind(myres[, 1], paste0(round(myres[, 2], 2), ifelse(myres[, 4] < 0.01, '***', ifelse(myres[, 4] < 0.05, '**', ifelse(myres[, 4] < 0.1, '*', "")))), round(myres[, 3], 2))
  for(i in skip){
    myres <- rbind(myres, c(paste0(i, ' FE'), '', ''))
  }
  colnames(myres) <- c('', 'coef', 'se')
  myres
}
nice_output(myres = res[[1]])
nice_output(myres = res[[2]])
nice_output(myres = res[[3]])
nice_output(myres = res[[4]])

# Stage 7. Generate Figure 5  ----------------------------------------------------
(p <- qes(zelig(delta_xconst_up ~ miss_new + actual + xconst_1_year_lag + baseline_miss + baseline_actual + as.factor(region) + as.factor(year), data = cp, model = "logit"), 
          iv.var = "miss_new",
          set.covar = "year = 1875", 
          sim.n = 100, range.n = 300,
          transform.x = function(x) exp(x),
          custom.range = c(3.4, 4.5) # confirms with other effect size plot
))
p+ylab("Probability of Coalition Expansion")+xlab("Missed Trade in Pounds Sterling")

# Stage 8. Generate Figure 6  ----------------------------------------------------

# This defines a custom function to construct a pretty residual plot:
make_resid_plot <- function(residdata = d, variable = "", xvariable = "polity2", covars = c("as.factor(year)")){
  
  residdata$one <- 1
  covars <- c(covars, "one")
  
  residdata$var <- residdata[, variable]  
  residdata$xvar <- residdata[, xvariable]
  
  residdata$var_resid <- NA
  
  residdata$var_resid[complete.cases(residdata[, c("var", "xvar", setdiff(covars, grep("as.factor", covars, value = T)))])] <- lm(as.formula(paste0("var ~ ", paste0(covars, collapse = "+"))), data = residdata)$resid 
  
  print(summary(residdata$year[!is.na(residdata$var_resid)]))
  
  print(summary(lm(var_resid ~ xvar, data = residdata)))
  
  p <- ggplot(residdata, aes(y=var_resid, x=xvar, col = as.factor(ccode), group = as.factor(ccode)))+geom_jitter(alpha=1)+theme_minimal()+geom_smooth(method = "lm", aes(col="black", group = 1), col = "black")+theme(legend.position = "none")+ylab(variable)+xlab(xvariable)
  print(p)
  
  print(dim(residdata[!is.na(residdata$var_resid), ]))
}

make_resid_plot(residdata = data.frame(econ_complexity_1), variable = "eci_value", xvariable = "polity2", covars = c("as.factor(year)", "log_gdppc", "log_pop"))

# Stage 9. Generate Figure 7  ----------------------------------------------------
library(coverage)
coverage(data=cp[, c('pascali_country', 'year', 'miss_new', 'sovereign')], unitvar='pascali_country', timevar='year', special.NA = 'sovereign')

# Stage 10. Generate Figure 8  ----------------------------------------------------

require(lmtest)
polity2.over.time <- vector()
for(k in -1:20){
  polity2.over.time <- c(polity2.over.time, paste0("polity2.01", ifelse(k > 0, paste0("lag", k), ifelse(k == 0, "", paste0("led", -k)))))}

# Subsetting model data frame to ensure analysis done on same sample
model.subset <- na.omit(econ_complexity_2[, c(polity2.over.time, "GDP.per.capita", "population", "year", "polity2", "eci_value", "ccode", "country")])

# Creating container vectors
coef <- rep(NA, 22)
se <- rep(NA, 22)
index <- 1

for(k in polity2.over.time){
  
  # Estimating model
  model <- lm(eci_value ~ model.subset[,k] + log(population) + log(GDP.per.capita) + as.factor(ccode) + as.factor(year), data = model.subset)
  
  # Robust standard errors
  model <- coeftest(model, vcov. = sandwich)
  
  coef[index] <- model[2,1]
  se[index] <- model[2,2]
  index <- index + 1
}

plot <- NULL
plot$bot.95 <- (coef - 1.96*se)
plot$bot.90 <- (coef - 1.645*se)
plot$est <- (coef) 
plot$top.90 <- (coef + 1.645*se)
plot$top.95 <- (coef + 1.96*se)
plot$year <- seq(-1, 20)
plot <- as.data.frame(plot)

g <- ggplot(plot, aes(x = year, ymin=bot.95, ymax=top.95, y=est)) + geom_vline(xintercept=0, color = "darkgrey", linetype = "longdash") + geom_linerange() + geom_point(size = 2) + theme_bw() + labs(y = "Economic Complexity Index", x = "Time, with 1 Point Change in Polity2 Score (on 0-1 scale) in Year 0")+ stat_smooth(se = FALSE, color = "grey")+geom_hline(yintercept = 0, linetype = "longdash") + geom_linerange(aes(x=year, ymin=bot.90, ymax=top.90), size = 1)
g

# Stage 11. Generate Table A1  ----------------------------------------------------

summary.stats <- cp[cp$year %in% cp$year[!is.na(cp$ln_exportpc)], c("xconst", "ln_exportpc", "pred_new", "miss_new", "baseline_actual", "baseline_miss", "coastal_support", "polity2", "democracy", "urban_50pc", "civilwar_cow_01", "war", "year")]

colnames(summary.stats) <- c("Coalitions",
                             "Trade per Capita",
                             "Predicted Trade per Capita",
                             "Missed Opportunity for Trade",
                             "Trade at Baseline",
                             "Missed Opportunity at Baseline",
                             "Coastal Support",
                             "Polity2 Score",
                             "Democracy indicator (Boix, Miller, and Rosato 2013)",
                             "Economic Development rbanization rate, pop >50k))
",
"Civil War",
"War",
"Year")

library(stargazer)
stargazer(summary.stats, summary.stat = c("n", "sd", "min", "median","mean", "max"))

# Stage 12. Generate Table A2  ----------------------------------------------------
QuickReg(iv.vars = c("xconst", "polity2_sans_xconst", "baseline_miss", "baseline_actual", "urban_50pc", "pec", "civilwar_cow_01", "war"), 
         iv.vars.names = c("Coalitions", "Democracy Score (Polity2), Excepting Xconst", "Baseline Missed Opportunity", "Baseline Actual Trade", "Urbanization Rate", "Primary Energy Consumption", "Civil War", "War"),
         dv.vars.names = "Missed Opportunity",
         dv.vars = "miss_new",
         specifications = list(c(1,2,3),
                               c(1,2,3,4),
                               c(1,2,3,4,5),
                               c(1,2,3,4,5,7,8)),
         fixed.effects = c("year", "region"),
         fixed.effects.names =  c("Year", "Region"),
         cluster = c("ccode", "year"),
         cluster.names = "Country and Year",
         data = cp)


# Stage 13. Generate Table A3  ----------------------------------------------------
QuickReg(iv.vars = c("xconst", "democracy", "baseline_miss", "baseline_actual", "urban_50pc", "pec", "civilwar_cow_01", "war"), 
         iv.vars.names = c("Coalitions", "Democracy Score", "Baseline Missed Opportunity", "Baseline Actual Trade", "Urbanization Rate", "Primary Energy Consumption", "Civil War", "War"),
         dv.vars.names = "Missed Opportunity",
         dv.vars = "miss_new",
         specifications = list(c(1,2,3),
                               c(1,2,3,4),
                               c(1,2,3,4,5),
                               c(1,2,3,4,5,7,8)),
         fixed.effects = c("year", "region"),
         fixed.effects.names =  c("Year", "Region"),
         cluster = c("ccode", "year"),
         cluster.names = "Country and Year",
         data = cp)
